Unbalanced Panel of 531 NBA players from 2013 to 2018: Outcome Value over replacemente player (VORP) and 148 features before preprocessing.

#create correlation matrix
cor_num <- cor(df_num_mean)

#delete suggested variables with high correlation with other features
cor_high <- findCorrelation(cor_num, cutoff = .95)  

names(df_num_mean[,cor_high]) # deleted variables due to collinearity
##  [1] "FGA3_t1"   "FTA_t1"    "TRB_t1"    "PSG_t1"    "FGA3_t2"  
##  [6] "FTA_t2"    "TRB_t2"    "PSG_t2"    "FGA3_t3"   "FTA_t3"   
## [11] "TRB_t3"    "PSG_t3"    "TRBPer_t1" "WS_t1"     "TRBPer_t2"
## [16] "FG_t1"     "FG2_t1"    "Age_t1"    "FG_t2"     "FG2_t2"   
## [21] "Age_t2"    "FG_t3"     "FG2_t3"
skim(x_mean_train)
## Skim summary statistics
##  n obs: 988 
##  n variables: 121 
## 
## ── Variable type:factor ─────────────────────────────────────────────────────────────────────────
##  variable missing complete   n n_unique
##    Pos_t1       0      988 988        5
##    Pos_t2       0      988 988        5
##    Pos_t3       0      988 988        5
##     Tm_t1       0      988 988       31
##     Tm_t2       0      988 988       31
##     Tm_t3       0      988 988       31
##                           top_counts ordered
##    PF: 223, SG: 210, PG: 192, C: 183   FALSE
##   PF: 228, SG: 220, PG: 193, SF: 175   FALSE
##   PF: 227, SG: 217, PG: 196, SF: 177   FALSE
##  TOT: 107, GSW: 36, TOR: 36, CHI: 35   FALSE
##   TOT: 86, HOU: 35, TOR: 35, BOS: 34   FALSE
##   TOT: 98, UTA: 37, DEN: 35, HOU: 35   FALSE
## 
## ── Variable type:numeric ────────────────────────────────────────────────────────────────────────
##   variable missing complete   n   mean     sd     p0    p25    p50   p75
##     Age_t3       0      988 988 24.18   4.19   17    21     24     27   
##    Ar3P_t1       0      988 988  0.29   0.21    0     0.099  0.3    0.44
##    Ar3P_t2       0      988 988  0.27   0.19    0     0.12   0.27   0.39
##    Ar3P_t3       0      988 988  0.25   0.17    0     0.16   0.25   0.33
##     AST_t1       0      988 988  2.03   1.84    0     0.8    1.4    2.6 
##     AST_t2       0      988 988  2.19   1.77    0     1      2      2.4 
##     AST_t3       0      988 988  2.3    1.7     0     1.1    2.28   2.3 
##  ASTPer_t1       0      988 988 13.43   9.29    0     7.1   10.35  17.9 
##  ASTPer_t2       0      988 988 13.75   8.75    0     7.8   12.35  15.83
##  ASTPer_t3       0      988 988 14.02   8.15    0     8.5   13.99  14.53
##     BLK_t1       0      988 988  0.45   0.45    0     0.2    0.3    0.6 
##     BLK_t2       0      988 988  0.47   0.42    0     0.2    0.4    0.5 
##     BLK_t3       0      988 988  0.5    0.41    0     0.2    0.5    0.5 
##  BLKPer_t1       0      988 988  1.69   1.64    0     0.6    1.2    2.23
##  BLKPer_t2       0      988 988  1.68   1.52    0     0.7    1.4    1.8 
##  BLKPer_t3       0      988 988  1.68   1.3     0     0.8    1.69   1.69
##     BPM_t1       0      988 988 -0.79   3.28  -20.8  -2.6   -0.8    0.9 
##     BPM_t2       0      988 988 -0.5    2.83  -18.9  -1.72  -0.51   0.8 
##     BPM_t3       0      988 988 -0.35   2.7   -20.1  -1.2   -0.4    0.6 
##    DBPM_t1       0      988 988 -0.13   1.9   -10.2  -1.3   -0.1    1   
##    DBPM_t2       0      988 988 -0.071  1.64   -8.2  -1.1   -0.066  0.8 
##    DBPM_t3       0      988 988 -0.077  1.55  -13.2  -0.7   -0.094  0.6 
##     DRB_t1       0      988 988  3.01   1.76    0     1.78   2.7    3.9 
##     DRB_t2       0      988 988  3.18   1.61    0     2.1    3.18   3.7 
##     DRB_t3       0      988 988  3.21   1.5     0     2.3    3.21   3.5 
##  DRBPer_t1       0      988 988 15.32   6.86    0    10.5   14.1   19.3 
##  DRBPer_t2       0      988 988 15.15   5.61    0    10.97  15.17  17.9 
##  DRBPer_t3       0      988 988 14.93   5.06    0    11.5   14.98  16.7 
##     DWS_t1       0      988 988  1.46   1.12   -0.1   0.6    1.3    2.1 
##     DWS_t2       0      988 988  1.62   1.07   -0.1   0.9    1.62   2.1 
##     DWS_t3       0      988 988  1.73   1.03   -0.1   1.1    1.7    2.1 
##     eFG_t1       0      988 988  0.5    0.072   0     0.47   0.5    0.54
##     eFG_t2       0      988 988  0.5    0.057   0     0.47   0.5    0.53
##     eFG_t3       0      988 988  0.5    0.047   0.22  0.48   0.5    0.51
##  FG2Per_t1       0      988 988  0.49   0.081   0     0.45   0.48   0.53
##  FG2Per_t2       0      988 988  0.48   0.065   0     0.46   0.48   0.51
##  FG2Per_t3       0      988 988  0.48   0.055   0     0.46   0.48   0.5 
##     FG3_t1       0      988 988  0.8    0.76    0     0.1    0.6    1.3 
##     FG3_t2       0      988 988  0.78   0.68    0     0.2    0.77   1.1 
##     FG3_t3       0      988 988  0.77   0.63    0     0.3    0.76   1   
##  FG3Per_t1       0      988 988  0.31   0.12    0     0.29   0.33   0.37
##  FG3Per_t2       0      988 988  0.31   0.12    0     0.3    0.31   0.36
##  FG3Per_t3       0      988 988  0.3    0.11    0     0.3    0.3    0.36
##     FGA_t1       0      988 988  7.77   4.47    0     4.47   6.8   10.5 
##     FGA_t2       0      988 988  8.27   4       0     5.3    8.25  10.2 
##     FGA_t3       0      988 988  8.61   3.67    1     6.4    8.55  10   
##    FGA2_t1       0      988 988  5.53   3.58    0     2.9    4.7    7.8 
##    FGA2_t2       0      988 988  6.08   3.35    0     3.7    6.08   7.62
##    FGA2_t3       0      988 988  6.45   3.14    0.2   4.5    6.43   7.3 
##   FGPer_t1       0      988 988  0.45   0.079   0     0.41   0.44   0.49
##   FGPer_t2       0      988 988  0.45   0.063   0     0.42   0.45   0.47
##   FGPer_t3       0      988 988  0.45   0.052   0.22  0.43   0.45   0.47
##      FT_t1       0      988 988  1.62   1.43    0     0.7    1.2    2.1 
##      FT_t2       0      988 988  1.76   1.29    0     0.9    1.7    2   
##      FT_t3       0      988 988  1.85   1.2     0     1.1    1.84   1.9 
##   FTPer_t1       0      988 988  0.74   0.13    0     0.68   0.76   0.82
##   FTPer_t2       0      988 988  0.74   0.11    0     0.71   0.74   0.81
##   FTPer_t3       0      988 988  0.75   0.093   0     0.73   0.75   0.8 
##     FTr_t1       0      988 988  0.27   0.15    0     0.18   0.25   0.35
##     FTr_t2       0      988 988  0.28   0.13    0     0.2    0.28   0.32
##     FTr_t3       0      988 988  0.28   0.11    0     0.22   0.28   0.3 
##       G_t1       0      988 988 59.67  21.28    1    47     67     76   
##       G_t2       0      988 988 62.93  17.36    1    59     64     76   
##       G_t3       0      988 988 64.6   15.41    1    64     64.22  76   
##      GS_t1       0      988 988 30.28  29.53    0     2     19     60   
##      GS_t2       0      988 988 34.11  27.43    0     7     34.03  59.25
##      GS_t3       0      988 988 37.02  25.61    0    15     36.5   58   
##      MP_t1       0      988 988 22.08   8.67    0.7  15.67  22.2   29.3 
##      MP_t2       0      988 988 23.46   7.72    2    18.67  23.42  29.42
##      MP_t3       0      988 988 24.32   7.2     2.5  21.48  24.22  28.83
##    OBPM_t1       0      988 988 -0.66   2.77  -16.4  -2     -0.6    0.6 
##    OBPM_t2       0      988 988 -0.43   2.41  -14.3  -1.5   -0.45   0.6 
##    OBPM_t3       0      988 988 -0.27   2.28  -16.7  -1.1   -0.3    0.5 
##     ORB_t1       0      988 988  0.97   0.81    0     0.4    0.7    1.4 
##     ORB_t2       0      988 988  1.05   0.77    0     0.5    1      1.3 
##     ORB_t3       0      988 988  1.12   0.76    0     0.6    1.12   1.2 
##  ORBPer_t1       0      988 988  5.12   4       0     2      3.6    7.82
##  ORBPer_t2       0      988 988  5.23   3.64    0     2.2    5.1    7   
##  ORBPer_t3       0      988 988  5.55   3.65    0     2.8    5.53   6.3 
##     OWS_t1       0      988 988  1.67   2.15   -3.3   0.2    1.1    2.42
##     OWS_t2       0      988 988  1.92   2.03   -3.3   0.6    1.9    2.4 
##     OWS_t3       0      988 988  2.12   2      -2.7   1      2.09   2.3 
##     PER_t1       0      988 988 14.13   5.05  -11.4  11     13.8   16.8 
##     PER_t2       0      988 988 14.47   4.32  -11.4  12.2   14.47  16.22
##     PER_t3       0      988 988 14.79   3.84   -7.5  13     14.77  15.9 
##      PF_t1       0      988 988  1.86   0.7     0     1.4    1.9    2.3 
##      PF_t2       0      988 988  1.96   0.6     0     1.6    1.96   2.3 
##      PF_t3       0      988 988  2      0.56    0     1.8    1.99   2.3 
##     STL_t1       0      988 988  0.72   0.43    0     0.4    0.6    1   
##     STL_t2       0      988 988  0.77   0.41    0     0.5    0.76   0.9 
##     STL_t3       0      988 988  0.79   0.39    0     0.6    0.78   0.9 
##  STLPer_t1       0      988 988  1.61   0.76    0     1.2    1.5    2   
##  STLPer_t2       0      988 988  1.63   0.66    0     1.2    1.61   1.9 
##  STLPer_t3       0      988 988  1.64   0.6     0     1.3    1.62   1.8 
##     TOV_t1       0      988 988  1.25   0.8     0     0.7    1.1    1.6 
##     TOV_t2       0      988 988  1.37   0.74    0     0.9    1.36   1.6 
##     TOV_t3       0      988 988  1.44   0.68    0     1      1.43   1.6 
##  TOVPer_t1       0      988 988 12.79   4.27    0    10     12.6   15.2 
##  TOVPer_t2       0      988 988 13.07   3.58    0    10.8   13.07  14.8 
##  TOVPer_t3       0      988 988 13.21   3.2     0    11.67  13.23  14.1 
##  TRBPer_t3       0      988 988 10.25   3.97    0     7.3   10.26  11.6 
##      TS_t1       0      988 988  0.53   0.069   0     0.51   0.54   0.57
##      TS_t2       0      988 988  0.53   0.055   0     0.51   0.53   0.56
##      TS_t3       0      988 988  0.53   0.045   0.26  0.52   0.53   0.55
##     USG_t1       0      988 988 19.11   5.32    0    15.3   18.5   22.2 
##     USG_t2       0      988 988 19.42   4.56    0    16.5   19.42  21.72
##     USG_t3       0      988 988 19.86   4.17    8.6  17.7   19.82  21.4 
##    VORP_t1       0      988 988  0.81   1.48   -1.4  -0.1    0.3    1.3 
##    VORP_t2       0      988 988  0.95   1.36   -1.4   0.075  0.9    1.3 
##    VORP_t3       0      988 988  1.09   1.32   -1.6   0.27   1.05   1.2 
##      WS_t2       0      988 988  3.53   2.78   -2.1   1.6    3.53   4.4 
##      WS_t3       0      988 988  3.86   2.7    -1.5   2.3    3.8    4.4 
##    WS48_t1       0      988 988  0.093  0.068  -0.3   0.055  0.091  0.13
##    WS48_t2       0      988 988  0.096  0.059  -0.3   0.069  0.096  0.12
##    WS48_t3       0      988 988  0.099  0.053  -0.29  0.082  0.098  0.12
##    p100     hist
##   38    ▃▇▅▆▃▁▁▁
##    1    ▇▅▆▆▃▁▁▁
##    1    ▆▃▇▃▂▁▁▁
##    0.87 ▅▂▇▂▂▁▁▁
##   11.2  ▇▅▂▁▁▁▁▁
##   11.7  ▇▇▂▁▁▁▁▁
##   11.1  ▅▇▂▁▁▁▁▁
##   57.3  ▅▇▃▂▁▁▁▁
##   52.7  ▃▇▇▂▂▁▁▁
##   49.3  ▂▅▇▁▁▁▁▁
##    3.7  ▇▃▁▁▁▁▁▁
##    3.7  ▇▆▁▁▁▁▁▁
##    3    ▆▇▁▁▁▁▁▁
##   15.1  ▇▂▁▁▁▁▁▁
##   15.1  ▇▂▁▁▁▁▁▁
##    9.3  ▆▇▁▁▁▁▁▁
##   15.6  ▁▁▁▃▇▂▁▁
##   12.5  ▁▁▁▂▇▃▁▁
##   11.6  ▁▁▁▁▇▃▁▁
##   12.1  ▁▁▂▇▃▁▁▁
##    5.8  ▁▁▁▃▇▃▁▁
##    5.5  ▁▁▁▁▂▇▂▁
##   10.3  ▃▇▆▃▂▁▁▁
##   10.3  ▂▅▇▂▁▁▁▁
##   10.1  ▁▃▇▂▁▁▁▁
##  100    ▆▇▁▁▁▁▁▁
##   45.1  ▁▅▇▃▂▁▁▁
##   45.1  ▁▃▇▂▁▁▁▁
##    6    ▇▇▅▅▂▁▁▁
##    6.6  ▅▅▇▂▁▁▁▁
##    6.6  ▃▃▇▂▁▁▁▁
##    1    ▁▁▁▇▇▁▁▁
##    0.74 ▁▁▁▁▂▇▁▁
##    0.71 ▁▁▁▂▇▂▁▁
##    1    ▁▁▁▇▅▁▁▁
##    0.74 ▁▁▁▁▃▇▁▁
##    0.86 ▁▁▁▁▇▁▁▁
##    5.1  ▇▃▃▁▁▁▁▁
##    5.1  ▇▇▃▁▁▁▁▁
##    3.6  ▆▇▂▂▁▁▁▁
##    1    ▂▁▇▃▁▁▁▁
##    1    ▁▁▇▂▁▁▁▁
##    1    ▁▁▇▂▁▁▁▁
##   24    ▃▇▆▃▃▂▁▁
##   22    ▂▅▇▃▂▂▁▁
##   22.2  ▂▃▇▂▂▁▁▁
##   18.6  ▅▇▆▃▂▁▁▁
##   20.4  ▃▆▇▃▂▁▁▁
##   20.4  ▂▃▇▂▁▁▁▁
##    1    ▁▁▁▇▂▁▁▁
##    0.74 ▁▁▁▁▇▃▁▁
##    0.71 ▁▁▂▇▂▁▁▁
##    9.2  ▇▅▂▁▁▁▁▁
##    8.8  ▆▇▂▁▁▁▁▁
##    8.8  ▃▇▂▁▁▁▁▁
##    1    ▁▁▁▁▂▆▇▂
##    1    ▁▁▁▁▂▇▇▁
##    1    ▁▁▁▁▁▇▆▁
##    1.5  ▅▇▂▁▁▁▁▁
##    1.22 ▂▇▃▁▁▁▁▁
##    0.94 ▁▃▇▂▁▁▁▁
##   83    ▁▁▁▂▂▂▅▇
##   82    ▁▁▁▁▁▂▇▇
##   83    ▁▁▁▁▁▂▇▆
##   82    ▇▂▂▂▁▁▂▃
##   82    ▇▂▂▆▁▂▂▅
##   82    ▅▂▁▇▁▁▂▃
##   42    ▁▃▆▇▇▇▆▁
##   38.7  ▁▂▂▃▇▃▅▂
##   38.7  ▁▁▂▂▇▂▂▂
##   12.4  ▁▁▁▃▇▂▁▁
##   12.4  ▁▁▁▃▇▁▁▁
##    9.6  ▁▁▁▁▇▃▁▁
##    5.3  ▇▅▂▁▁▁▁▁
##    5.4  ▆▇▂▁▁▁▁▁
##    5.5  ▅▇▂▁▁▁▁▁
##   21.9  ▇▅▂▂▂▁▁▁
##   21.9  ▇▇▂▂▂▁▁▁
##   46.8  ▇▂▁▁▁▁▁▁
##   13.8  ▁▇▆▂▁▁▁▁
##   14.8  ▁▆▇▂▁▁▁▁
##   14.8  ▁▅▇▂▁▁▁▁
##   40.8  ▁▁▁▇▆▁▁▁
##   31.5  ▁▁▁▂▇▃▁▁
##   31.6  ▁▁▁▂▇▂▁▁
##    6    ▁▅▇▆▁▁▁▁
##    4.1  ▁▁▂▇▅▂▁▁
##    4.1  ▁▁▂▇▃▂▁▁
##    2.3  ▃▇▇▅▂▁▁▁
##    2.5  ▃▅▇▂▁▁▁▁
##    2.5  ▂▃▇▂▁▁▁▁
##   11.1  ▆▇▁▁▁▁▁▁
##    8.2  ▂▇▂▁▁▁▁▁
##    7.9  ▁▇▂▁▁▁▁▁
##    5.7  ▆▇▃▂▁▁▁▁
##    5.7  ▃▇▂▂▁▁▁▁
##    4.4  ▁▃▇▂▁▁▁▁
##   43.6  ▁▆▇▂▁▁▁▁
##   29.2  ▁▁▃▇▃▁▁▁
##   37.5  ▁▁▇▂▁▁▁▁
##   27.6  ▁▃▇▂▂▁▁▁
##    1    ▁▁▁▂▇▁▁▁
##    0.73 ▁▁▁▁▁▇▅▁
##    0.7  ▁▁▁▁▃▇▁▁
##   41.7  ▁▁▅▇▅▂▁▁
##   38.4  ▁▁▂▆▇▂▁▁
##   38.4  ▁▂▃▇▂▁▁▁
##   12.4  ▇▆▁▁▁▁▁▁
##    9.8  ▃▇▂▁▁▁▁▁
##    9.8  ▂▇▂▁▁▁▁▁
##   19.2  ▂▆▇▂▁▁▁▁
##   19.3  ▂▅▇▂▁▁▁▁
##    0.43 ▁▁▁▅▇▂▁▁
##    0.32 ▁▁▁▁▅▇▁▁
##    0.32 ▁▁▁▁▃▇▁▁
# Hyperparameter Grid for Random Forest
rfGrid <- expand.grid(
    mtry = seq(round(length(x_mean_train)/3-5,0), round(length(x_mean_train)/3+5,0), by=2),
    #mtry = round(length(x_mean_train)/3,0),
    splitrule = "variance",
    min.node.size = 5
  )

# Random Forest via Caret's Train function & ranger lib
set.seed(1234)
rf <- train(x = x_mean_train, y = y_train,
            method = "ranger",
            num.trees = 500,
            tuneGrid = rfGrid,
            trControl = trainControl(method = "cv"),
            importance = "permutation")
plot(rf)

#predict yhat for test set
rfPred <- predict(rf, x_mean_test) 

#data frame to compare predictions to outcomes in test data
predictions <- data.frame(Player = df_pred$Player,
                          Year = df_pred$Year_t1+1,
                          Pos = df_pred$Pos_t1,
                          y = y_test,
                          y_hat = rfPred) %>% 
  mutate(error = y_hat - y) %>% 
  mutate_if(is.factor, as.character)

#predictions df for training data
predictions_training <- data.frame(Player = df[trainingRow, ]$Player,
                                   Year = df[trainingRow, ]$Year_t1+1,
                                   Pos = df[trainingRow, ]$Pos_t1,
                                   y = y_train,
                                   y_hat = predict(rf)) %>% 
  mutate(error = y_hat - y)

Predictions Player Performance

Test data

The test data has 173 observations with mean y of 0.76 and standard deviation of 1.43. The RMSE is 0.7388318; the MAE is 0.4995466.

Training data

The test data has 988 observations with mean y of 0.77 and standard deviation of 1.46.

2018/19 season

Teams

predictions_2019 %>% 
  mutate(vorp = round(vorp,2),
         Wins_pred_2019 = round(Wins_pred_2019,2),
         Error_Pred_Vegas = round(Error_Pred_Vegas,2),
         Error_Pelton_Vegas = round(Error_Pelton_Vegas,2),
         Sum_Error = round(Sum_Error,2)
         ) %>% 
datatable(options = list(scrollX = TRUE))

Player growth